
************************
** MERGE ALL DATASETS **
************************

*** Merge refugees and municipalities datasets ***

use "final_refugees.dta", clear

merge 1:1 municipality using "final_municipalities.dta"
drop _merge

** create new combined variables

* ratio of refugees over population
foreach i of varlist refugees_* {
bysort municipality: gen ratio_`i'=`i'/pop
label var ratio_`i' "ratio of `i' over population in each municipality"
 }

foreach j in ABO EBO {
foreach i of varlist refugees`j'_* {
bysort municipality: gen ratio_`i'=`i'/pop
label var ratio_`i' "ratio of `i' over population in each municipality"
 }
 }

* average of ratio_refugees per month
foreach j in 01 02 03 04 05 06 07 08 09 10 11 12{
egen mratiorefugees_2015`j'=mean(ratio_refugees_2015`j')
label variable mratiorefugees_2015`j' "average number of ratio refugees in month 2015`j' "
}

foreach j in 01 02 03 {
egen mratiorefugees_2016`j'=mean(ratio_refugees_2016`j')
label variable mratiorefugees_2016`j' "average number of ratio refugees in month 2016`j'"
}

* dummy if many or few refugees pre and post crisis
gen manyrefugeespre=1 if (ratio_refugees_201502>mratiorefugees_201502) & (ratio_refugees_201503>mratiorefugees_201503) & (ratio_refugees_201504>mratiorefugees_201504) & (ratio_refugees_201505>mratiorefugees_201505) & (ratio_refugees_201506>mratiorefugees_201506) & (ratio_refugees_201507>mratiorefugees_201507) & (ratio_refugees_201508>mratiorefugees_201508)
replace manyrefugeespre=0 if manyrefugeespre==.

gen manyrefugeespost=1 if (ratio_refugees_201509>mratiorefugees_201509) & (ratio_refugees_201510>mratiorefugees_201510) & (ratio_refugees_201511>mratiorefugees_201511) & (ratio_refugees_201512>mratiorefugees_201512) & (ratio_refugees_201601>mratiorefugees_201601) & (ratio_refugees_201602>mratiorefugees_201602)
replace manyrefugeespost=0 if manyrefugeespost==.

* dummies if always above/below average
gen treatfew=1 if manyrefugeespre==0 & manyrefugeespost==0
replace treatfew=0 if manyrefugeespre!=0 | manyrefugeespost!=0

gen treatmany=1 if manyrefugeespre==1 & manyrefugeespost==1
replace treatmany=0 if manyrefugeespre!=1 | manyrefugeespost!=1

* dummies if switching from above(below) to below(above)
gen treatmanyfew=1 if manyrefugeespre==1 & manyrefugeespost==0
replace treatmanyfew=0 if manyrefugeespre!=1 | manyrefugeespost!=0

gen treatfewmany=1 if manyrefugeespre==0 & manyrefugeespost==1
replace treatfewmany=0 if manyrefugeespre!=0 | manyrefugeespost!=1

* panel format (municipality and time)
reshape long housing_ refugeesABO_ refugeesEBO_ refugees_ percABO_ percEBO_ percOTHERS_ gaprefugees_ availability_ ratio_refugees_ ratiod_refugees_ ratio_refugeesABO_ ratio_refugeesEBO_ ratio_refugeesOTHERS_, i(municipality) j(time)

sort municipality time
drop time
bysort municipality: gen time=_n

rename housing_ housing 
rename refugeesABO_ refugeesABO
rename refugeesEBO_ refugeesEBO
rename refugeesOTHERS_ refugeesOTHERS
rename refugees_ refugees
rename percABO_ percABO
rename percEBO_ percEBO
rename percOTHERS_ percOTHERS
rename gaprefugees_ gaprefugees
rename availability_ availability
rename ratio_refugees_ ratio_refugees
rename ratiod_refugees_ ratiod_refugees
rename ratio_refugeesABO_ ratio_refugeesABO
rename ratio_refugeesEBO_ ratio_refugeesEBO
rename ratio_refugeesOTHERS_ ratio_refugeesOTHERS

* housing per capita
bysort municipality: gen housingpc=housing/pop

* monthly change in number refugees relative to pop
sort municipality time
bysort municipality:gen change_ratio_refugees=ratio_refugees-ratio_refugees[_n-1]
label var change_ratio_refugees "change in num refugees over pop in each month"

* monthly change in ABO,EBO refugees relative to pop
sort municipality time
foreach j in ABO EBO {
bysort municipality: gen change_ratio_refugees`j'=ratio_refugees`j'-ratio_refugees`j'[_n-1]
label var change_ratio_refugees`j' "change in `j' refugees over pop in each month"
}

* indicator variables for below and above median number of refiugees
sort municipality time
preserve
keep if time==8 | time==12
bysort municipality: gen change_ratio_refugees_crisis=ratio_refugees-ratio_refugees[_n-1]

egen median_change_crisis=median(change_ratio_refugees_crisis)

gen above_median=1 if change_ratio_refugees_crisis>median_change_crisis
replace above_median=0 if change_ratio_refugees_crisis<=median_change_crisis
drop if time==8
keep municipality above_median
save "abovemedian.dta", replace
restore

merge m:1 municipality using "abovemedian.dta"
drop _merge

save "final_refugeesMunicip.dta", replace


*** Merge click data and refugees/municipality data ***

use "final_refugeesMunicip.dta", clear

* change time variable
replace time=201412 if time==1
replace time=201501 if time==2
replace time=201502 if time==3
replace time=201503 if time==4
replace time=201504 if time==5
replace time=201505 if time==6
replace time=201506 if time==7
replace time=201507 if time==8
replace time=201508 if time==9
replace time=201509 if time==10
replace time=201510 if time==11
replace time=201511 if time==12
replace time=201512 if time==13
replace time=201601 if time==14
replace time=201602 if time==15
replace time=201603 if time==16

rename time pubmonth

sort municipality pubmonth

** main refugees articles dataset **

merge 1:m municipality pubmonth using "final_click.dta"
drop _merge

save "click_refugees.dta", replace

** all other datasets **

foreach i in total classified totalclassified classified1 classified2 other events accidents{

merge 1:m municipality pubmonth using "final_click_`i'.dta"
drop _merge

save "`i'_refugees.dta", replace


*** Create final datasets ***


** Main refugees articles dataset **

use "click_refugees.dta", clear

* clean dataset for municipalities without clicks 
drop if noclicks==1

* scale clicks measure
replace norviews=norviews*100

* save dataset with outliers
save "final_datasetoutliers.dta", replace

* drop click outliers and january
drop if pubmonth==201501
drop if municipality=="Hagfors" & pubmonth==201512
drop if municipality=="Ale" & pubmonth==201502
drop if municipality=="Dorotea" & pubmonth==201502
drop if municipality=="Norsjo" & pubmonth==201502
drop if municipality=="Svenljunga" & pubmonth==201502
drop if municipality=="Tanum" & pubmonth==201502
drop if municipality=="Trosa" & pubmonth==201502

* log variables
gen lnorviews=ln(norviews+1)
gen lratiorefugees=ln(ratio_refugees+1)

* trend
gen trend=1 if pubmonth==201502
replace trend=2 if pubmonth==201503
replace trend=3 if pubmonth==201504
replace trend=4 if pubmonth==201505
replace trend=5 if pubmonth==201506
replace trend=6 if pubmonth==201507
replace trend=7 if pubmonth==201508
replace trend=8 if pubmonth==201509
replace trend=9 if pubmonth==201510
replace trend=10 if pubmonth==201511
replace trend=11 if pubmonth==201512
replace trend=12 if pubmonth==201601
replace trend=13 if pubmonth==201602

* destring municipality 
encode municipality, gen(id)
xtset id pubmonth

save "final_dataset.dta", replace


** All other datasets **

foreach i in total classified totalclassified classified1 classified2 other events accidents{

use "`i'_refugees.dta", clear

* clean dataset for municipalities without clicks 
drop if noclicks==1

* scale clicks measure
replace norviews=norviews*100

* drop click outliers and january
drop if pubmonth==201501
drop if municipality=="Hagfors" & pubmonth==201512
drop if municipality=="Ale" & pubmonth==201502
drop if municipality=="Dorotea" & pubmonth==201502
drop if municipality=="Norsjo" & pubmonth==201502
drop if municipality=="Svenljunga" & pubmonth==201502
drop if municipality=="Tanum" & pubmonth==201502
drop if municipality=="Trosa" & pubmonth==201502


* destring municipality 
encode municipality, gen(id)
xtset id pubmonth

save "final_dataset_`i'.dta", replace

}
